{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入相关库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "import re\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix,f1_score,classification_report\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "import jieba as jb\n",
    "import matplotlib.pyplot as plt\n",
    "import itertools\n",
    "plt.rcParams['font.sans-serif']=\"SimHei\"\n",
    "plt.rcParams['axes.unicode_minus']=False\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "#要爬取的新闻分类地址国内、国际、军事、航空、科技\n",
    "url_list={'国内':[ 'https://temp.163.com/special/00804KVA/cm_guonei.js?callback=data_callback',\n",
    "                   'https://temp.163.com/special/00804KVA/cm_guonei_0{}.js?callback=data_callback'],\n",
    "         '国际':['https://temp.163.com/special/00804KVA/cm_guoji.js?callback=data_callback',\n",
    "                 'https://temp.163.com/special/00804KVA/cm_guoji_0{}.js?callback=data_callback'],\n",
    "         '军事':['https://temp.163.com/special/00804KVA/cm_war.js?callback=data_callback',\n",
    "               'https://temp.163.com/special/00804KVA/cm_war_0{}.js?callback=data_callback'],\n",
    "         '航空':['https://temp.163.com/special/00804KVA/cm_hangkong.js?callback=data_callback&a=2',\n",
    "               'https://temp.163.com/special/00804KVA/cm_hangkong_0{}.js?callback=data_callback&a=2'],\n",
    "         '科技':['https://tech.163.com/special/00097UHL/tech_datalist.js?callback=data_callback',\n",
    "              'https://tech.163.com/special/00097UHL/tech_datalist_0{}.js?callback=data_callback']}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 爬取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=========正在爬取国内新闻===========\n",
      "https://temp.163.com/special/00804KVA/cm_guonei.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_02.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_03.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_04.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_05.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_06.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_07.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guonei_08.js?callback=data_callback\n",
      "=========正在爬取国际新闻===========\n",
      "https://temp.163.com/special/00804KVA/cm_guoji.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_02.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_03.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_04.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_05.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_06.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_07.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_guoji_08.js?callback=data_callback\n",
      "=========正在爬取军事新闻===========\n",
      "https://temp.163.com/special/00804KVA/cm_war.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_02.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_03.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_04.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_05.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_06.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_07.js?callback=data_callback\n",
      "https://temp.163.com/special/00804KVA/cm_war_08.js?callback=data_callback\n",
      "=========正在爬取航空新闻===========\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_02.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_03.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_04.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_05.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_06.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_07.js?callback=data_callback&a=2\n",
      "https://temp.163.com/special/00804KVA/cm_hangkong_08.js?callback=data_callback&a=2\n",
      "=========正在爬取科技新闻===========\n",
      "https://tech.163.com/special/00097UHL/tech_datalist.js?callback=data_callback\n",
      "https://tech.163.com/special/00097UHL/tech_datalist_02.js?callback=data_callback\n",
      "https://tech.163.com/special/00097UHL/tech_datalist_03.js?callback=data_callback\n",
      "爬取完毕！\n"
     ]
    }
   ],
   "source": [
    "def parse_class(url):\n",
    "    '''获取分类下的新闻'''\n",
    "    req=requests.get(url)\n",
    "    text=req.text\n",
    "    res=re.findall(\"title(.*?)\\\\n\",text)\n",
    "    #去除不规范的符号\n",
    "    for i in range(len(res)):\n",
    "        res[i]=re.sub(\"\\'|\\\"|\\:|'|,|\",\"\",res[i])\n",
    "    return res\n",
    "titles=[]\n",
    "categories=[]\n",
    "def get_result(url):\n",
    "    global titles,categories\n",
    "    temp=parse_class(url)\n",
    "    if temp[0]=='>网易-404</title>':\n",
    "        return False\n",
    "    print(url)\n",
    "    titles.extend(temp)\n",
    "    temp_class=[key for i in range(len(temp))]\n",
    "    categories.extend(temp_class)\n",
    "    return True\n",
    "\n",
    "for key in url_list.keys():\n",
    "    #按分类分别爬取\n",
    "    print(\"=========正在爬取{}新闻===========\".format(key))\n",
    "    #遍历每个分类中的子链接\n",
    "    #首先获取首页\n",
    "    get_result(url_list[key][0])\n",
    "    #循环获取加载更多得到的页面\n",
    "    for i in range(1,10):\n",
    "        try:\n",
    "            if get_result(url_list[key][1].format(i)):\n",
    "                pass\n",
    "            else:\n",
    "                continue\n",
    "        except:\n",
    "            break\n",
    "print(\"爬取完毕！\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据合并\n",
    " * 由于新闻具有实时性，每天，或者每天的不同时间段会有不同的新闻产生，因此在保存前将此次爬取中与之前保存的数据中重复内容删除再保存，以达到扩充数据集的目的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "更新数据集...\n"
     ]
    },
    {
     "ename": "UnicodeEncodeError",
     "evalue": "'gbk' codec can't encode character '\\ufffd' in position 8: illegal multibyte sequence",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mUnicodeEncodeError\u001b[0m                        Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-3-67f2ee6e14bb>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"更新数据集...\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mold\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mnew\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"新闻数据集.csv\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'gbk'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     16\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"更新完毕，共有数据:\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"条\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     17\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mE:\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mto_csv\u001b[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, tupleize_cols, date_format, doublequote, escapechar, decimal)\u001b[0m\n\u001b[0;32m   1743\u001b[0m                                  \u001b[0mdoublequote\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdoublequote\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1744\u001b[0m                                  escapechar=escapechar, decimal=decimal)\n\u001b[1;32m-> 1745\u001b[1;33m         \u001b[0mformatter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1746\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1747\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mpath_or_buf\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mE:\\anaconda3\\lib\\site-packages\\pandas\\io\\formats\\csvs.py\u001b[0m in \u001b[0;36msave\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    149\u001b[0m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwriter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mUnicodeWriter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mwriter_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    150\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 151\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_save\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    152\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    153\u001b[0m         \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mE:\\anaconda3\\lib\\site-packages\\pandas\\io\\formats\\csvs.py\u001b[0m in \u001b[0;36m_save\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    261\u001b[0m                 \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    262\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 263\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_save_chunk\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart_i\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mend_i\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    264\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    265\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_save_chunk\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstart_i\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mend_i\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mE:\\anaconda3\\lib\\site-packages\\pandas\\io\\formats\\csvs.py\u001b[0m in \u001b[0;36m_save_chunk\u001b[1;34m(self, start_i, end_i)\u001b[0m\n\u001b[0;32m    288\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    289\u001b[0m         libwriters.write_csv_rows(self.data, ix, self.nlevels,\n\u001b[1;32m--> 290\u001b[1;33m                                   self.cols, self.writer)\n\u001b[0m",
      "\u001b[1;32mpandas\\_libs\\writers.pyx\u001b[0m in \u001b[0;36mpandas._libs.writers.write_csv_rows\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mUnicodeEncodeError\u001b[0m: 'gbk' codec can't encode character '\\ufffd' in position 8: illegal multibyte sequence"
     ]
    }
   ],
   "source": [
    "new=pd.DataFrame({\n",
    "    \"新闻内容\":titles,\n",
    "    \"新闻类别\":categories\n",
    "})\n",
    "old=pd.read_csv(\"新闻数据集.csv\",encoding='gbk',engine='python')\n",
    "def update(old,new):\n",
    "    '''\n",
    "    更新数据集：将本次新爬取的数据加入到数据集中（去除掉了重复元素）\n",
    "    '''\n",
    "    data=new.append(old)\n",
    "    data=data.drop_duplicates()\n",
    "    return data\n",
    "print(\"更新数据集...\")\n",
    "df=update(old,new)\n",
    "df.to_csv(\"新闻数据集.csv\",index=None,encoding='gbk')\n",
    "print(\"更新完毕，共有数据:\",df.shape[0],\"条\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,1,'各个类别新闻爬取数目统计')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAESCAYAAADpO/4pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAG7pJREFUeJzt3X28lVWd9/HPFw4QATWgRxTrhNz5HCJ6MkFUVNRMHY1qtCxrrMhGrfR1T1rq5Dh66zhlNpoPmKWT9kBpYlmGpiSaY4LPk6lZmJqOKIjiI8Lv/mOtw9kc9uFs4XguWPv7fr3Oi73Xde1r//YF53utvdbaG0UEZmZWjn5VF2BmZr3LwW5mVhgHu5lZYRzsZmaFcbCbmRXGwW5vmCRV+XgzWz0He5OTNFDSQW8wbGdKOqqH424r6R/z7Y9IujjfPhA4/w3WuIWk2+u0f0vSBjX3+0saJWlvSadIGlGz7WhJgyT1k9TSwHO+RdJoSftKOkPS6G72U9fjSWqR1H81xz46H/9GSWMl/bOkt0m6SNJuPdXWCEmTJA2puT9I0iWSBtTZdzdJx9RpnyFpn96ox/pWj//ArXgHAkdExMxGdpZ0ALAtsLmkH0bEwm52fQk4XdKfgKXAq5LeCpwFHNflmP8P+EdgcW76P8DbI+KlfP+1fAwkvRP4PfA8sCFwUA7Ww4CfAFOAPwP35xo6jAZOAX4BnCtpeZd6R0fEhvk5TgUOBtqAefl4x0naE3grsAh4NiL2Ad4N/CxfYB4BhgGbANdKGg+8no//SEQckm+3ACflbUOAQyPiP/Lx/73eyZS0FLin3jZgLPCeiHg47zsEmAHsJ+l+QBHxqqQXgK2BeyUNBJZG+iDL/wDfkbQgIn5Uc9zX8o+tZxzsTSwH4r8C75Q0Nzf3B7YBJkTEnV32fzdwEbA/sDPwS0kHRMQzXfYbBDwKfAHYBfhj3rQTcCNwnaRBEfFqbl8KnBwR38mPnw+8JunjwETSxQCAiHhM0i7A2IiYKWkacE9E3C7p9Yi4t87rPAH4GjAiIh4Ddqizz5M1d5cDpwP/TArIFlKgnwYcSQr9G3I9D0s6mhTOR0raOe9zJPD7iGiXtBVwXn6etwB/AwIYDkwGbs499aER8eeOc1hzfgBejoj2rnXXnK+lNU3HABcALwCHAkfn5x0AtOc3Z4OAqcBjEfGspA8D35T04/CnFtd7DvbmdhYwC9gOOB64G7gCuLROqL+XFHJHAWOAuUArcLukz0XEDTW7zySF1lLgbaQe5ZOkkH8RuAUYJGlKRDwHCBgl6T358QNy2yvU7zEuzfXOBD4LHJDbV/TCJfUDWiLiNWAa8H3g6nwxW5R3Gw4sAyblPzssIwUvpF7/qC7bobMn3uGgXP8w4K7cQx6ct40G5ufbA4HdSBe5duABYCHweaBfvsC2AS9I2j4iXuh8SSsuvl1t0lFPvkB8nnRhmQF8JyImSDoTGBQRx0raICKerT1ARNyb/z4c6gVwsDcpSdsAmwH/QAqunwGPA3Mj4ls1+40CTiD1/D4eEbMkXUjqjf5bHmq5UtI9wNERcW9EvD8/dkvgUuBCUjA+BewKfCkiHqgp50Hgg8BHSUF3G93M/0jaF/gnYKmknwJbAhdJuiJt1t3AxqSgO5PUU14aEU9I+hBwYU191wFHRsRLdaYYOgJuMbA96cJUq+sY+swuPXZyjf2A9wAd7yReJg3tbEq6cD1HutBMBM7L5/RHwDdqQh1gWQ899g4vAN8EPkc6r9+VdCywQUR8VtKmwG8kfTQi7pL0JeAzwAjgCEknkt6dBOnfx3vzEM4g4PQuQzW2jnKwN6mI+APwQUmbAIcArwJDgc0kvR/4XUQ8D7yF1OseHxFP5Ie/Qu4dR8QPJf0GOJo0xoyk9wFHAJuTesvbAntFxGmSJpMmX6dHxNfzMa4ArpD0EjAu97KpE7YAt5OCcRnwK9KF4kVSOC6NiO0lnQb8KSIu7Xi5NY9/n6Rb8u1tuzk9/ejsoT9NCuEnScMzi0jDMN3NLdR6CNiCNNzyL7ntHcAzwJeBU0lDRAeT3qXsmPdpI5/LNyqH9TPA4cDuwEjgbOApSfNIY+xXAj+QNDYizgHOkXQpsDwidu04lqTLST3+2WtSi1XHwd6Eck/646QhiJeAy4HdImJpDuWPAmfmSbgJEfGp1R0vIp6mM7gg9fh+GhHX5+f7O9JFg4iYLWkn0vABefz5ItLwSn/SuD3AzcAf6jxdK3ATqefbRgqpAaShmUZW9jxKHu8mvROpZxidE6+PkIaq5gIndvRYc0+81lRJ7aTJ0I4VPDeQJoW3qBnaGgEcS5rb6Af8nPTaJwKzJQ0HhtWZlO7fw1AMua4NgOtJQ1izgV+Sztl5pJ78FaThq8ER0XU4qeuEsq2nHOzN6RnS0MCPSIG1KXB8TQ+5jRTut0TEi2tw/HOBFyV9Ld8fDWxU01MWMFTSccCtpPDbCPgO8CngvcCHSJOuK4V1nqz8IKnn+WXgi/n1/C+pd92TZcCSmtv1lj9uCizIz7dAaenixqR3Bh2ulnRGRNyW719VMxRzXF4RczlpeOurNfXPkzQpv+73AfsCH4iIlyVdBVxNmmBepe4Gh2KeI60OuoP0d/xYRCyTtCGwYlw9IhZRh6TDgB94rH395mBvQnni7CeSNgYejIjta7fnt+CvrWGoExEdk6DkQLmVFHJ/i4iT6jzkz3mp3+8i4nFJW5N65K+z8pLFjlUlF5J6vJCGir5PWiY4r7uaJI0lrXIZQ2dveQxp+eOxXXYfx8pj6jNIq4B2kfS53PY4cHeuZ0tgvNJa/XeSLozvJo1RLwA+JOniiHglP3YU6YJ6HbAVne92riLNC/xbd6+jJxGxDDhZaWnpfsBHJV1GWkK6YnhH0tCIWNLl4V8kvbO6inT+bT3lDyg1t64rPWqtrsfWwqqThytR+rDQgcBvgf9LmqAbIunXkiZ22XcLUrh1DJEMBV6KiKsi4iv5uTrq2ZfUq72DNOwxhzSO/wnSmDukXn5tT78/Kaink4ZVJkTEeFK4HkTqIS/PtWwDDImIv9HZ8fkmKbDvBD4JTIyIAyLi5VzDvsA3SEM7p+baBpJCehfSxeP3kjqWWf6J9G5pEHAisK2kqcCP87maLmnFWHc2QNLcej+koRjl+neVdGd+jr8nrXQ6hzRpHPk8DgDOlvSxmuO35fO+f35dth5zj725tQBb5pUktdqAy1bzuAGk4FpFHnv+Hmnibg7wwYh4KG8+VumTjP+Zx5InkoZUZpBWytwr6RTg06QxcyQdTBpyeRggf5BqpqQfk1bmPC/pKdI48rX5eQblnw7D8lr7W1jZqcBgUk/2uty2KWkoCVKQExEL8/DJ0aSLz7tzj7g9fyhoas3rb8nnpj+wd0Q8CXxd0oPAk/lx15ICd2pEPKW0Xv+fSGvhH8qTnP8i6eCad009rWPveL13AV8BbshDMOeS5jzOztvvJn3QqR9wcs1hzgZm13wojPw6as+jrSfkobTmlUN4eNc1zb1w3G2A+V1Cous+74qIR/ME7biI+F1u35rUW3803x9DWgd//eqOt5b11huWKEIeCnu+Y6WRNQcHu5lZYTzGbmZWGAe7mVlhKpk83XDDDWP06NFVPLWZ2Xpr3rx5z0REa0/7VRLso0ePZu7c7j5EZ2Zm9Uh6tJH9PBRjZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlaY9fb72EefcG3PO/WB+WfuX3UJcMrbq64gOWVx1RUw9rKxVZfAfZ+8r+oSrMm5x25mVhgHu5lZYRzsZmaFcbCbmRXGwW5mVhgHu5lZYRoOdknnSzow375E0m2STqrZvkqbmZn1vYaCXdKuwMYR8XNJU4H+ETEBGCNp83ptb2LNZma2Gj0Gu6QBwMXAfEkHAZOBGXnzLGBSN21mZlaBRnrshwN/AM4CdgKOAp7I2xYCI4EhddpWImmapLmS5i5YsGBt6zYzs240EuzjgekR8RRwOXAzMDhvG5qPsaRO20oiYnpEtEdEe2trj//JtpmZraFGgv1PwJh8ux0YTedQyzhgPjCvTpuZmVWgkS8BuwT4rqRDgQGk8fRrJI0C9gN2BgKY06XNzMwq0GOwR8QLwEdq2yRNBvYGzoqIxd21mZlZ31ujr+2NiEV0roLpts3MzPqeP3lqZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWHWaLmjma37Hthq66pLYOs/PlB1CU3JPXYzs8I42M3MCuNgNzMrjIPdzKwwDnYzs8I42M3MCuNgNzMrjIPdzKwwDnYzs8I42M3MCuNgNzMrjIPdzKwwDnYzs8I42M3MCuNgNzMrjIPdzKwwDnYzs8KsNtgltUj6q6TZ+WespH+VdIekb9fst0qbmZlVo6ce+3bADyNickRMBgYCk4CdgKclTZG0Y9e2N7NgMzNbvZ7+z9OdgQMk7QHcBzwIXBkRIenXwH7A4jptN3Q9kKRpwDSAtra2XnwJZmZWq6ce+x3AlIjYCRgADAaeyNsWAiOBIXXaVhER0yOiPSLaW1tb17pwMzOrr6ce+70R8Wq+PZfOcAcYSrowLKnTZmZmFekphL8vaZyk/sDBpN75pLxtHDAfmFenzczMKtJTj/1U4AeAgGuA04A5kr4FvD//PAqc0aXNzMwqstpgj4j7SStjVsirXvYHvhURf+muzczMqtFTj30VEfEy8NOe2szMrBqe6DQzK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArTULBLGinprnz7Ekm3STqpZvsqbWZmVo1Ge+xfBwZLmgr0j4gJwBhJm9dre7OKNTOznvUY7JL2BF4EngImAzPyplnApG7a6h1nmqS5kuYuWLBg7ao2M7NurTbYJQ0ETgZOyE1DgCfy7YXAyG7aVhER0yOiPSLaW1tb17ZuMzPrRk899hOA8yPiuXx/CTA43x6aH1+vzczMKtJTCE8BjpI0G9geOJDOoZZxwHxgXp02MzOrSMvqNkbEbh23c7j/PTBH0ihgP2BnIOq0mZlZRRoeNomIyRHxPGmy9L+BPSJicb22N6NQMzNrzGp77PVExCI6V8F022ZmZtXwRKeZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVxsFuZlYYB7uZWWEc7GZmhXGwm5kVpqFglzRC0t6SNnyzCzIzs7XTY7BLGg78AtgJuElSq6RLJN0m6aSa/VZpMzOzvtdIj3074LiIOB34NbAn0D8iJgBjJG0uaWrXtjevZDMzW52WnnaIiN8CSNqN1GsfAczIm2cBk4Dxddoerj2OpGnANIC2trZeKN3MzOppdIxdwCHAIiCAJ/KmhcBIYEidtpVExPSIaI+I9tbW1rWt28zMutFQsEdyFHAvMBEYnDcNzcdYUqfNzMwq0Mjk6fGSDs93/w44kzTUAjAOmA/Mq9NmZmYV6HGMHZgOzJD0GeB+4GrgZkmjgP2AnUnDM3O6tJmZWQUamTxdBOxd2yZpcm47KyIWd9dmZmZ9r5Ee+ypy2M/oqc3MzPqeJznNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK4yD3cysMA52M7PC9Bjskt4u6VeSZkn6maSBki6RdJukk2r2W6XNzMz6XiM99sOAsyNiH+Ap4FCgf0RMAMZI2lzS1K5tb17JZma2Oi097RAR59fcbQU+DpyT788CJgHjgRld2h6uPY6kacA0gLa2trUq2szMutfwGLukCcBw4DHgidy8EBgJDKnTtpKImB4R7RHR3traulZFm5lZ9xoKdkkjgHOBI4AlwOC8aWg+Rr02MzOrQCOTpwOBnwBfiYhHgXmkoRaAccD8btrMzKwCPY6xA58GdgBOlHQi8D3gE5JGAfsBOwMBzOnSZmZmFWhk8vQC4ILaNknXAHsDZ0XE4tw2uWubmZn1vUZ67KuIiEV0roLpts3MzPqeJznNzArjYDczK4yD3cysMA52M7PCONjNzArjYDczK8waLXc0M1uffPvIG6sugaMu3LPPnss9djOzwjjYzcwK42A3MyuMg93MrDAOdjOzwjjYzcwK42A3MyuMg93MrDAOdjOzwjjYzcwK42A3MyuMg93MrDAOdjOzwjjYzcwK42A3MyuMg93MrDANBbukkZLm5NsDJP1c0q2SjuiuzczMqtFjsEsaDlwGDMlNxwDzImIX4MOShnXTZmZmFWikx74MOAR4Pt+fDMzIt28G2rtpMzOzCvQY7BHxfEQsrmkaAjyRby8ERnbTthJJ0yTNlTR3wYIFa1e1mZl1a00mT5cAg/PtofkY9dpWEhHTI6I9ItpbW1vXpFYzM2vAmgT7PGBSvj0OmN9Nm5mZVaBlDR5zGfBLSbsC2wC3k4ZhuraZmVkFGu6xR8Tk/OejwN7ArcCUiFhWr+1NqNXMzBqwJj12IuJvdK6C6bbNzMz6nj95amZWGAe7mVlhHOxmZoVxsJuZFcbBbmZWGAe7mVlhHOxmZoVxsJuZFcbBbmZWGAe7mVlhHOxmZoVxsJuZFcbBbmZWGAe7mVlhHOxmZoVxsJuZFcbBbmZWGAe7mVlhHOxmZoVxsJuZFcbBbmZWGAe7mVlhHOxmZoVxsJuZFaZXg13SJZJuk3RSbx7XzMwa12vBLmkq0D8iJgBjJG3eW8c2M7PG9WaPfTIwI9+eBUzqxWObmVmDFBG9cyDpEuA/I+IeSfsAO0TEmTXbpwHT8t0tgQd75YnXzobAM1UXsY7wuejkc9HJ56LTunAu3hURrT3t1NKLT7gEGJxvD6XLu4GImA5M78XnW2uS5kZEe9V1rAt8Ljr5XHTyuei0Pp2L3hyKmUfn8Ms4YH4vHtvMzBrUmz32q4E5kkYB+wE79+KxzcysQb3WY4+I50kTqP8N7BERi3vr2G+idWpoqGI+F518Ljr5XHRab85Fr02empnZusGfPDUzK4yD3cysMA52M7PCNE2wS9pI0hZd2sZK2rKqmqok6R35p5+k4V22nS1pUFW1VUFSf0mH1dwfI+nLVdZk1ZI0uZv2VkmH9HE5b0hvLndc120LbCdpKbCU9GGqqcCLkpZHxMOVVtf3bgAeAX4BbCXp4oi4P2/bAXitssr6kKSrgQ8Dy4DPSLoSOBeYAHyjytqqIOkaYDPS74iAjtUV/YA7I+KIqmqrwInAbEn/BTwNfDsi/gIcQjo/66xmCvblwOvAHOA+oH++vxTYi7T2vpksAA4G7gYOBC4HJuZtEc2zXGpIRLwOIKkfKcCuB44jhX2zGRYRYzvu5K8CuTQiXpP0oKT+EVH8eZE0gM6L2nDgPOBMSX8Edgf2rqq2RjTNUEyNB0hr7QX8CPgvUsg3ow8APwH+wso99GYJdVj5tY4lfdDuM8CVwLWSbqqkquqsOB+SjiV9NqUjyKc0Q6hnNwI7SLoe2D4ifg+cAhwOzIkI99jXEXsBGwOtwNb5z3bgVWAjSVtExEMV1tcnJG0AnAQMiIiZkq6LiJDUJumrpAveu6qtsk/V/g7cHRH7SPo0cHlEvCrp+1UV1tckHQFsKGki0AbsCnykI8wj4rEq6+tLEbGrpOsjYm9J1+Uhu4XAbsAPJQ2PiEUVl9mtZuqxvyX/tACD8p+1bc3yC/w68DCwPL/d7Oh5vAo8nn9erai2Knw3TyD3B1okDQQ2IX09xonA56otr2/kYahhpN+Hg4DP59sbVVlXxebn8/I/wOERcUS+uE0HPlttaavXNJ88lbQ78B7SP9pbSF9YdjnwInAk6SuHf15dhX1L0hxgJrA8Is6WNDsiJudtN0XEHpUW2Eck3UoaU58NPEbqkR0MXEt69/K9iGiKiWRY+e9e0gdIE8iHRMS91VbW9yS9CxgPbAA8GBG35PaNgLER8Zsq61udZhqK6bAtaSJ1Jc0U6lkAFwE3SZpFehfTjPoBt5Emz3ch/WcxXyTNPezeTKGerejpRcQvJb1Mc825ACDpN6QL+wbAs8Bmku7LbS3AVyssr0fNFOz988+OpGA/jPRWcylwXYV1VWUT4FbgeOA0Ov8TFICBklo6VosUbjRp+ZpIQ1Tjge2BvwLnSaLJlvgtk3QvXVYESRLw1ojYov7DyhIRe+X/3nNKRFwg6Qzg+oi4UdIU4EPAXdVW2b1mGop5J7BxRNxR09ZG6pU1y/j6CpLaSeuSl0t6W/52TiR9iTTWekYzBLuk7UgX93qrPVqAfjXr+4u2uqWMOdhb1vXVIL1J0lBgZEQ8kodlDo2If5fULyJWede/LmmmYF8E/Ao4OiIWStqUNMv9i4jYq9rq+p6kG0hrcUew8lvticBREdEU6/olLSSt5V+pmXROBIyLiBF9XlgFujkXKzYD20XEBn1YUmUk7cHKF3uRPtswHXgBGBQR11dRWyOaaSjm7oj4mKQ7Je0GHENax95sY6gdPZHleZnjT0nr2JU3B3BxZcX1vXtIS2EPAX5MGqqbB5wSEV9rsnXsPhed9iOtIBsMvEL6vQjgCOAhUnY62NcBHb3SF/PPa/mnOd6yZJImAWfTuczxmSYbQ+4q8gXuE6SJ0zNIY+1t1ZZVCZ+LLCK+LOlg4EvAnsCFwDXAiIj4eqXFNaCZ1rF3rNV9Hdif9KnLk4FtJJ0vab3531HW0lzS6o+X8/1nJc2UdJWkWZIulLRZhfVVZThpCeww0jLQgdWWU6mmPxf58wx7krLi88D/ki52O1ZZV6Oaqcc+EPgd6QMXDwB/AGaRVkVcQfrHXLyIeAVA0nmShpHGDVWzy1jgAkkHNtNEGfB24L3A20ifd2i6MKvhcwHtEfGF3BkUaWjqBOCJastqTDMF+2vAR4CbgWNJy9nuAhZFxK1VFtaX8uqGfsB/kL4QrQWYQlryOSDfX9pMoZ7PyV+Bc4D3R8Q3JDXdhDr4XNT4qqTjgSXAx0hfR3I18HReTTcgIh6pssDVaaZgJyKelPQUadzwZNJX1jabiaR163NI5+F54B3ApcAngU+TvuioaeRx5fMiYpmk43Jzx8W+qeZgfC5WuBY4nzRRuhXpncshwE2kHvwA4FNVFdeTZlvueBcpvB4H9iV9tcDMZlvuKGkIaaz9LtI/0j1J32YH8ChwXUTMrqa6viXpOdISv45VD7VLHfuThqZam2RNv89FJmlH0jv8E4HfkubkjiENUX0hIv5aYXk9aqZg34i0xO+ZLu1TIuKGispaJ0j6AnBu7q1tDfwDcHqT/AKv9sMmktQs303vc9Epf0HeW0lr1nePiJty+3hgQUQ8XmV9PWmaYDczaxZNtdzRzKwZONjNzArjYDczK4yD3cysMA52M7PC/H/Q6pAPxIiS2gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df.新闻类别.value_counts().plot(kind='bar')\n",
    "plt.title(\"各个类别新闻爬取数目统计\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据清洗\n",
    "   * 去标点符号\n",
    "   * 去停用词\n",
    "   * 去数字\n",
    "   * 分词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache C:\\Users\\lzh\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 0.681 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据预处理完毕！\n"
     ]
    }
   ],
   "source": [
    "def remove_punctuation(line):\n",
    "    line = str(line)\n",
    "    if line.strip()=='':\n",
    "        return ''\n",
    "    rule = re.compile(u\"[^a-zA-Z0-9\\u4E00-\\u9FA5]\")\n",
    "    line = rule.sub('',line)\n",
    "    return line\n",
    " \n",
    "def stopwordslist(filepath):  \n",
    "    stopwords = [line.strip() for line in open(filepath, 'r', encoding=\"UTF-8\").readlines()]  \n",
    "    return stopwords  \n",
    " \n",
    "#加载停用词\n",
    "stopwords = stopwordslist(\"./stop_words.txt\")\n",
    "#删除除字母,数字，汉字以外的所有符号\n",
    "df['clean_review'] = df['新闻内容'].apply(remove_punctuation)\n",
    "#分词，并过滤停用词\n",
    "\n",
    "df['cut_review'] = df['clean_review'].apply(lambda x: \" \".join([w for w in list(jb.cut(x)) if w not in stopwords]))\n",
    "print(\"数据预处理完毕！\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TF-IDF转化，建立朴素贝叶斯模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "模型训练完毕！\n"
     ]
    }
   ],
   "source": [
    "#转词向量\n",
    "tfidf = TfidfVectorizer(norm='l2', ngram_range=(1, 2))\n",
    "features = tfidf.fit_transform(df.cut_review)\n",
    "labels = df.新闻类别\n",
    "#划分训练集\n",
    "x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.2,random_state=0)\n",
    "model=MultinomialNB().fit(x_train,y_train)\n",
    "y_pred=model.predict(x_test)\n",
    "print(\"模型训练完毕！\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 模型评估\n",
    " * 混淆矩阵\n",
    " * 分类评估报告"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeQAAAG2CAYAAACu6PUFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3XeYFFXWx/HvYQQEhiGIgCCirDmLqKggiGDOAq45grq6RtQ1re6aMKw5IIqi8ppWxYgkFZC0rqhgQIUFA6LEIQ9xzvvHrcERYWbQbqq65vfxqWe6qruqz5TNnD733rpl7o6IiIjEq0rcAYiIiIgSsoiISCIoIYuIiCSAErKIiEgCKCGLiIgkgBKySDnMzLJwzI0yfUwRyW1KyJJ6ZrZ99LOFmXVYx2t2MrOzosddzOzx6PFRwCNrvNbWWO9rZpub2UNm1qqCYd1gZndXMP4bzezUCh5XRHKUErKkmpkdAbwcJVEHeptZzbW8dAlwq5m1BVYAy6LX3Qm8Uep4WwOjzKwgWt8BaAtMB54GLllLDFPN7KtoGR8dowdwkJkNM7M+a7y+j5mNiJ4bBpwO3FWybmYfmFn/CvzuE83s52iZVt72Mo5ziJm9Y2Zvmtneazy3lZl9b2Z1ovUTzOxHM9s3Wj+xvOOLSKBmM0ktM6sC3Ahc52EGnKlmNgC4D+he6nXVge+Ai4H9ga+ip/YG3gMGmll1d1/m7pPN7ANggJkdApwD1AJGANUAN7MPgaruvkd0nK2Bl4C7ga+jY57p7v82s4eBwWuEfiuwxN1/NrNtgSeATkAVdy8ys52AVeX87jUBd/fGFdlexnFaAPcDpwDNgVfNrJn/MqPQA8Dt7j4/Wj8tWrqa2ST0pV+kwvSPRdKsBzDf3V8vte1aYD8z+1eppufXgTHApcDJQH/geOBeYDdgJKEqrhu9/hrgI6A2ISH3cvc2wADgeXffu1QyBqhKSMpjgZrAO8CFUfXbDbgsqnybR6/fn/AloAXhy8O1hC8Qfc1sG0JyL338tdkN+Gw9tq9LI+Cv7j7O3V8FNgbyAczsaGAroHep1xcA30c/jwdeW4/3EqnUVCFLKpnZQYQEu3/p7e6+KOpHHgKMMLNz3f3QaJ/tgL5AL0IF+jOhOfpSd59Y6hjFwKVmdj/wmwFfUWWe5+4rok3nADUIiauQkKSqAg8Bd7t7ZzMbSPTv0d2fNbNvovduAvwLmAWcCGwPdHf3UWu85/3AOHd/Jtq0B9DGzKYDRcBl7v5GGdvXyt3HRMevCpwfvcfCaP1eYB7wipk97+4vRutbAwsJrQRF6zq2iPyaKmRJHTOrTUh+NwBjzGy+mS0ws2lRn+m3wMOEpulVZraPmT0GPEqoRIcD1d39FuB24HUz67GWtyoiDPg628xGAmcTmr1HAX+PYtkJuJnQD30DsDPh392JwAtAu6hS3puoGTqK/0RC8/Z9wKdRXO8TKtSeZrZL6UDc/ZJSyRhgEXC1uzchNDc/HjXNr2t7ee4gfDF4MFrvQqieHwZeBh4ys0OBJ6NthUCNqN98ywocX6TSM91cQtLIzKq5+/Lo8Z3Aj+5+f7Q+ELjN3UdE6wcCG7n7kGi9LfAXdz8pWq8LbFZSJUfV4btAB+AioK6732RmNxEqxAeIKmQza0lIfE7oQ34CuAc4nN9WyOcDPwDjCIm+CLiKMOBsPiHJ/UBIgCe5+28GkJVxPj4ELnL3DyuyfR3H2CX6vf8E3AIUu/tl0XM3Ag3d/cJo/RKgPfA2UMPdH1zrQUVkNTVZSyqVJONIO349+rkpUHp08YPA4iipAGwJNIyqXgjN0vlmdrm7vwt0Bb5x95VRN/SZZtY+2m8lcAIhcd3k7h9HI7KPLPV+tYGOwGbAHmbWC9ix1Hsd6O6F0fqzZnYLofn8MUISXGVm48xsI3dfubbf38xOB14odR6aAsXr2r62Y0TH2R1Y6O7/c/fPoqbuLYAFwNxSL11K+DKCmf0JmAwcA0wF9lvX8UXkF0rIkmpmdgyhWh1bavNmwI8lK+6+c6nXNyA0OfcDprv79WscrxahOjy61Oa+pStkd79vLaH8mdAnPJfQj/sWoR+7PqFyLknIRwB/M7NlpfbdClgOdI5iAMgjNA2/sI5f/QCgadS3fFa0/2eEKnxt29dlV+AsMzuMUBk3BiYRRpXfZGa9gerAqcAV0T6HESr87sDmhOQtIuVxdy1aUrkQKrQfgb2i9QJCtTZtLa/NA44Cvoh+ViEMWhoE7Ffqde2BF0utXw7MJvRHTydU3t9EPw+OXnMQIenWJyTRzwgJeEvg5eg1g4At1/F73EJoVi7rd70fOL3UekNCX/jC6Ocu5WxvBkxYy3GN0IIwA/gcaF/quSsJFfAcwqVPJefx9OjxSYTLybaJ+7OgRUsuLOpDllSKKsCOhOQwLtp2FWGSjTs9GgAVjYh+itCs/QFws7t/U+o4BwO3AfUIiXnGGu9zNaGP9KYyYvkTIdm+G/VPd3L3v0d904cSmoyvADq6+8K17H8H8LO73/v7zoaI5AIlZEmlaKTyUv/l0qOyXrsj8K27LynjNc3d/btMxigiUpoSsoiISALoOmQREZEEUEIWERFJgJy+7KnKxgW+Ue2GcYeRaNtvXifuEHJCcbG6bspTfSN9fy/PSn2OKmTCpx/PdvdNN9T75RU0d1+ZmVlcvWjWII+m2820nE7IG9VuSIMT7ow7jEQbcPsRcYeQExYUrXV+DSmlRcNacYeQeHMWLS//RcLm9apv0AGSvrKI6tt1zcixln76cIOMHGgtcjohi4iIlM/Akt/Ck/wIRUREKgFVyCIikm4G2G/ulJo4SsgiIpJ+arIWERGRilCFLCIi6acmaxERkbjlxihrJWQREUm/HKiQk/+VQUREpBJQhSwiIulmqMlaREQkfqYmaxEREakYVcgiIpJ+arIWERFJADVZi4iISEWoQhYRkZTTxCAiIiLx092eREREEiIHKuTkRygiIlIJqEIWEZGUUx+yiIhIMlRJfh9y8r8yiIiIVAKqkEVEJN10cwkREZGEyIHLnpL/lUFERKQSUIUsIiIpp1HWIiIiyZADTdZKyCIikn6qkNPv9AO24uhWmwNQp0ZVPp5aSJP6NWhQuzoTvpvH1c99EnOEkktmz5rBZd1P5dn+Q3jo7lv4aOzIsH3mDI7pcgrd/toj5gglV3z/3VSuv/JSFi1cyO57tuLvt9wZd0hSjuR/ZUi4Z0ZMpfM9H9D5ng/4z+Q5/DyviFc//IHDbn+f/I03Ytct6sYdYiLMmjmD4w/vAMCKFSs466TjOe7Q9rzQr2+8gSXI/HmFXHvpeRQtWQLART2up+/LA+n78kC23m5Hju58cswRJsv53c6hXZt96XnbLXGHkki33XQdl1x5La++8x4/Tf+R0SOHxx1SfMwyt2SREnKGNK67MQ0KqvPt7MVs36SAghpVaVKvBtMLi+IOLXbz5hVy2V/OXZ1onur9CLvstgf9Bw5jwBv9WbRwYcwRJkNeXh7/evRp8mvX/tX2zz4dR6PNmtJosyYxRZY8r/V/lVWrVjF85BimTpnC5EmT4g4pcaZMnsQuu+0BwCYNNmXhgvkxRxQzq5KZJYuycnQza2Rm88xsWLSMKvV4WPRcczM7yMzaR8tdZnZzqfUOZtY4G/Flw5nt/sQzw6fw4eTZbF6/Jucc+Ccm/byQeYuXxx1a7PKq5PHIk/1WJ5oxo0Zw5LGdAdhnvzZM+HRcnOElRn7tAmoX1PnN9n59HuGUs8+PIaLkGjF8GCd06QrAQZ0OZvSokTFHlDxHHHM8995xC0PeeYth7w6mzQEd4g5JypGtdF8MfOru7YGTCX3VHd29fbRtINAY2LnU0hjYrNT6bkBOtPeawX7bNWD0N7O54ogduPq5T7h3wFdM/nkhJ+7XPO7wYle7oICCUommaMliGjcJ1V7duvWYNXNmXKEl3oL585g7exZbbNki7lASZcnixTRp0hSA+vXrM2PmjJgjSp5LelzDgR0P4flnn6LLSadRKz8/7pDilQNN1lkZ1OXus8zsDDMbSUi0GwFDzWwjIJ+QnGeb2ffAOOBLQjJeBWwJ7Ai0dff/ZSO+TNtn6wZ8MrUQgDq1qrFD0wLGTZlLy63qM+IrJZs11ayVz9KiIgoK6rB48WJqVvY/FGV4b9DbtO1wSNxhJE6t/HyKikJ30KJFi/Di4pgjSqaddtmNH6f9wMNPPBt3KDHLjeuQsxnhD8DLwGCgE3AP0A44AJgTvaYY+AwYCUwCvooefwasyGJsGdV+x0aMnTQbgAcHfs2dp7Tk63uPpm6tqrz23x9iji55dt1tD/47djQAE7+YQLNmakVYl1HDh9Kq9f5xh5E4LVvuyejRoZn6swnj2aL5lvEGlFCPPngP3f5yCTVq1ow7FKmAbF72dAjwd6BX9HNHYG+gGvAgIWGfAuQBbfilQq4f7V9jbQc1s+5Ad4C8/AbZi3499Hz9i9WPP/22kAP/OTTGaJKv80mnckbXY/lwzCi++for9mi1d9whJUrflweufnzXw0/FGElyHXXMsXRs35afpk9n8KB3GD5ybNwhJVKPa/4edwjJkQMTg2SzQh4JHAk8R6iSJwMvAM8Dc8ysObAY6BctY4AR0eMVwMlmttuaB3X33u7eyt1bVdn4twNgJLn+/eYQADZv1pz/e/VtWu2zL8/3H0BeXl7MkUmuKSgoYNC7w9h7n9YMHPI+derob4GUoeRuTwkfZZ2VCtnMOgHXACVDjOsCTYC7o/XqQF9gYanXLI2WRYSEvLjUc5IyjTdrwlHHdY47DMlh9erVo3M00lokDbI1qGsIMKRk3czaAKe6+6+u3TCzHsBhgPNLk/Xu0c/X3f3rbMQnIiKVSW4M6tpQU2fWW9t7ufvdwN1mdjRwA3C6u0/cQDGJiEhlkQN9yBskIbv7m8CbZTz/BvDGhohFREQqoRyokJMfoYiISCWguz2JiEj6qclaREQkZpYbg7qSH6GIiEgloApZRETST03WIiIi8bMcSMhqshYREUkAVcgiIpJqRm5UyErIIiKSbhYtCaeELCIiKWc5USGrD1lERCQBVCGLiEjq5UKFrIQsIiKplwsJWU3WIiIiCaAKWUREUi8XKmQlZBERSbccuexJTdYiIiIJoApZRERSzXLkOmQlZBERST0lZBERkQTIhYSsPmQREZEEUIUsIiKplwsVshKyiIikmy57EhERkYpShSwiIqmXC03WqpBFRCTVSq5DzsRS5vuY1TOzAWb2kZk9Fm3rY2ZjzOz68uJUQhYREcmM04D/c/dWQG0zuwrIc/d9gRZmtk1ZO6vJWkREUm8DNVnPAXY2s7pAM2A+8FL03GCgDTBpXTurQhYRkfSzDC3QIGqSLlm6l3qXkUBz4GJgIlAN+DF6bi7QqKwQVSGLiEi6WUYr5NlRk/Ta3Aic7+4LzOxy4Fbg8ei5fMopgnM6IbdolE/vy9vHHUaiPT9+Wtwh5ISjtmscdwiSAoWLlscdgsSrHrCLmY0F9gF6EpqpxwK7AV+XtXNOJ2QREZGK2EB9yLcDTxGarccA9wIfmFkT4DCgdVk7KyGLiEjqbYiE7O4fAjut8b7tgU7Ane4+v6z9lZBFRESyxN0L+WWkdZmUkEVEJNVKJgZJOiVkERFJv+TnY12HLCIikgSqkEVEJN0yex1y1ighi4hI6uVCQlaTtYiISAKoQhYRkdTLhQpZCVlERNIv+flYCVlERNIvFypk9SGLiIgkgCpkERFJNTPN1CUiIpIIuZCQ1WQtIiKSAKqQRUQk9XKhQlZCFhGR9Et+PlaTtYiISBKoQhYRkdRTk7WIiEjcdLcnERGR+BmQA/lYfcgiIiJJoApZRERSTjN1iYiIJEIO5GM1WYuIiCSBKmQREUk9NVmLiIjEzdRkLSIiIhWkCllERFLNgCpVkl8iq0L+gxYtXMCV53blirNP4LoLT2PF8uUA3HNTD0a9NzDm6JJl4dzZ3NftKIoWLaTP1Wfz+JVn8PQNF7ByxfK4Q0uM2bNmcOqxnX617YLTuzDx8wkxRZRc53c7h3Zt9qXnbbfEHUoizZ41kzOOPxiAH76byrl/PpLOh+xHr/vuiDmyeJhlZskmJeQ/aMgb/6brWRfwrydfoX6Dhnz4wbuM/2gMc2fPZP8Oh8YdXqK83et2VixbxidDX6dtl7PpdtfT1K6/KV9/OCLu0BJh/rxCrrnkPJYsWbJ625uvvkizLbdih513jTGy5Hmt/6usWrWK4SPHMHXKFCZPmhR3SIkyf14h1192HkVF4bP0fN/HuPCK63h50GhGjxjK3DmzYo5wwzOzjCzZlJiEbGZ5ccfwexx3yjnstf+BAMwvnEN+nbrcff2lNG66BSOHDog5uuSY/PEYqm5ck9r1G7Dfsaeybas2ACyeN4f8epvEHF0y5OXlcU+vp8mvXRuAeYVzufMf11CnTl3+M2p4zNEly4jhwzihS1cADup0MKNHjYw5omTJy8vjrkf6Uis/fJbq1tuEbyZ+wexZM1m+bDm1C+rGHKGsTVYSspnlmdl8Mxu2jmWemVVdY7e/mtk52YhnQ/j8k/+ycP48pk39H8233o6Tzv0rEz/7mFee7R13aLFbuWI5Q599iMO7X/mr7d998TFFCxfQfMc9YoosWfJrF1C7oM7q9acff4hDjjyOrqedw+v/fp73Br0dY3TJsmTxYpo0aQpA/fr1mTFzRswRJcuan6X923dkwicf8tyTj7L3/gew0UaVbPhQhpqrc7LJ2t1XAeOBDkAHd2/v7u2BA6NtnwKrzGxISZIGLgQuK5W038xGbNmwYF4hD9x8NVff9iCTJk7gqK5nsMmmjeh0dBc++Y++ub//3GPsd8wp1MgvWL1tyYJ5vPbAP+lyVc8YI0u2iZ9P4OQzu7Npw0YcevTxfDjmg7hDSoxa+fkUFRUBsGjRIry4OOaIkq3PI/dwyz2PcfHVN7Js6VLGjHgv7pA2qHBzieQ3WWfza1IxcARwoZltAcwH5gHPALh7sZk1dvddAMzsTuANdx8ZrX+RxdgyZsXy5dx4yVl0u+IGGjdtRtMtWjD9h28B+PqzT2ncpFm8ASbA5I9H879PxjD6tX5MnzyRl+74G/Nm/Mhh3XpQr3HTuMNLrC22bMEP339Li2224/PxH9Nkc32WSrRsuSejR49kn9at+WzCeLbZdru4Q0q0H7//jp+nT6P+Jpsy8fNPadfxsLhDkrXIdrvF28A7wKXAR8BIQqI+L3p+VanXtiZUziVWrO2AZtYd6A7QqMnmGQ53/b39cj+++XIC/XrdS79e93LY8Scx+v3BvDegPytXruCfDzwVd4ixu+D+51c/7nXpyTTbfhe+GDmY9/o9wnv9HqH10aewe4cjYowwmc79y2Vc3+NCHrv/LjauUYMH+zwXd0iJcdQxx9KxfVt+mj6dwYPeYfjIsXGHlGgXXnEtZ3U9nMI5s2l30KHss3+7uEPawHLj5hLm7tk5cGiGvgT4F7A5sABYCPwNuMvd25vZp+6+u5kdDlwMLAU+cPd/lTxX1ntsv/Pu3vvVytX0sr7+M70w7hBywlHbNY47hMTbctNacYfwK4WFhbw7dAht2h5A48bJ+P83+edFcYeQE3ZpVnucu7faUO9Xs8l2vm33RzJyrPH/6Ji12LNaIbv7eKCjmfUAPgeOBcaVfo2ZdQDuBQ4Cfgb6mdl5ax5LRKS0evXq0TkaaS2SBllNyGZ2A7AfUAs4FFgO3LTGy+YAJ7r7tGif0wiDzS7IZmwiIlJ55EKTdTYTcp6737y2J6JR1FUITebjSz/n7iui1yTmGmkREclhOXJziawk5CiZ7hr1I6/N7tF7/6ZTKhqRPRQYk43YRESkcim57CnpspKQo0uaNnX38iYp3not+35vZjuVVMoiIiKVQdaarCuQjMvaV8lYREQyJgcKZN1+UURE0i8Xmqw1cEpERCQBVCGLiEjq5UCBrIQsIiIpZ2qyFhERkQpShSwiIqkWrkOOO4ryKSGLiEjK5cbdntRkLSIikgCqkEVEJPVyoEBWQhYRkfTLhSZrJWQREUm3HLnbk/qQRUREEkAVsoiIpFqlvv2iiIhIkuRCQlaTtYiISAKoQhYRkdTLgQJZCVlERNJPTdYiIiJSIaqQRUQk3XLkOmQlZBERSTXLkZtLKCGLiEjq5UA+Vh+yiIhIEqhCFhGR1KuSAyWyErKIiKReDuRjNVmLiIhkkpk9YmZHRY/7mNkYM7u+vP2UkEVEJNXMwsQgmVjKfy9rCzR29zfN7Hggz933BVqY2TZl7auELCIiqVfFMrOUxcyqAo8D35rZMUB74KXo6cFAmzJj/MO/pYiISOXRwMw+KrV0L/Xc6cCXwJ3A3sCFwI/Rc3OBRmUdWIO6REQk9TI4Mchsd2+1juf2AHq7+89m1g/YD6gRPZdPOUVwTifkJStXMX7m/LjDSLSzWjWPO4Sc0L7n+3GHkHgf3dQp7hASr9g97hBkHTbQKOvJQIvocStgS0Iz9VhgN+DrsnbO6YQsIiJSHiNMn7kB9AGeNLM/A1UJfchvmFkT4DCgdVk7KyGLiIhkgLsvBLqU3mZm7YFOwJ3uXmaTrhKyiIikXnkjpLPF3Qv5ZaR1mZSQRUQk3Sp4DXHcdNmTiIhIAqhCFhGR1MuBAlkJWURE0s3Ijbs9qclaREQkAVQhi4hI6uVAgayELCIi6ZcLo6yVkEVEJNXC7RfjjqJ86kMWERFJAFXIIiKSerkwyloJWUREUi/56VhN1iIiIomgCllERFJPo6xFRERiFmbqijuK8qnJWkREJAHKTMhmVsXMjii13snMDs1+WCIiIhkS3X4xE0s2lZmQ3b0YuNjMmkabjgdalTxvZu9lMTYREZGMKJkc5I8u2VRehVwH2BU4yszygMOBCWZ2u5ltBxRnNzwREZHKobw+5H7Az0Bf4DTgNeCvwADgBmC3bAYnIiKSCbnQZF3eKOtjgcuAB4H6wEnA68BU4EmgYVajExER+YPSMsp6C0IV/CZQFciLtp9b6rGIiEii5UKFXJGE3AHYCngaeAqoDtwDNM5qZCIiIpVIeU3WVwNTgEfcfYWZnQzcCxxDaLYWERFJvBxosS47Ibv74WZ2HqE6PpnQl3yZux8DYGa1sh+iiIjI72eWkrs9uftjZvZ+tPoBsHGp5/bPVmAiIiKVSYWmznT3b8zsVHdf5e4DzewAMzsm28GJiIhkQhomBnnfzPKj1bOjqTT7ATcCc7IbmqTVVZddxKABb8UdhkiqzZk1kzNPOASAiZ99SveTjuaM4zrxdO8HY44sHrkwyroiTdaLoofFhObqp4AxgGcxrpyyYO4sHu1xFlc/+RbP9byan7+dzE77HsghZ1wUd2iJM2bUSGbOmMEhhx8ZdyiJcuLem3PoLo0AqL1xVSZMm89GVYwWm9ZixDez6T1MYyjXdH63c5g48UsOO/wI/nbt9XGHkygL5hVy/eXnUVS0BICef7+SOx5+ikabNeWM4ztx0KFHsfkWW8YbpPxGeU3WpZPuloSZuy6Mfr5sZi9lKa6c8trDt7Ni2VLGDx9I8apiLu/1CrOnf8/MH/RHtLQVK1Zw+V/Pp9kWzXnnrTfiDidRXvxwGmf1GcdZfcYx7ttCfpizhCpmnNr7vzSrV4MtNqkZd4iJ8lr/V1m1ahXDR45h6pQpTJ40Ke6QEqVKXh53PtyX/PzaAMyfX0jjJptjZtSpW5/FixbGHOGGlwtN1uVVyKXffoq7H29mFwD/dvfZZvbkWncK817PBT5Zx3F3B9oBg4BvCdX3o8DZwKroNUvc/egK/RYx+mbcaKrXqEFB/U2Z9Ml/2KPD4QBsv1dbpkz4iIbNtoo5wuR48bln2W77Hbjosh480ethpk37nm7nqxWhtIa1q7NJfjUABn0+A4DRk+fQsnldvp+zJM7QEmXE8GGc0KUrAAd1OpjRo0ay9TbbxBxVcuTXLvjV+u6tWvN838eoU7ce06d9z7Y77BxTZPEwLCdGWZdXIY8ys43NbCOgpC95I+AlM3vY3c9e207uvgoYT5hUpIO7t3f39sCB0bZPgZXAQOAfwFDgeeAgd+/o7h2Bzn/sV8u+lSuWM/DphzjqvKsBWL50CXU3DfOl1Cyoy8LC2XGGlzifTfiU0846l0aNGtPlxJMZNWJ43CElzkmtm/Hih9OoUS2PmQuWAjC/aMXqJC3BksWLadIk3ISufv36zJg5I+aIku2G2+9nqz9tywtP9+asCy7Nel9o4mSoOo51UJe7Xw/UBl4EzjeztwBz9w7AI+Ucuxg4AhhgZl+a2RjCTSlOjJ4vqYQPBIZF71f67lEroy8CiTWkXy/aHncqNaNvo9Vr1GL5svBHdHnRYrxYN8MqbasWf+K7b6cA8Okn49h8iy1ijihZzGCvrerx36mFLFm2iuobhdlpa1bbKCfm4d2QauXnU1RUBMCiRYv0b60ceXl5bPmn0IJwxHEnlvNqiUuZCc/M3ib0I+8K3ApsByw3s/ZAFTOr5u6Hl3GIt4F3gEuBj4CRhER9XqnXtAF6A++b2SrC3NmfR697Anh2/X+tDeObcaOY9PEYPnj1WX6c/CWFM6ZTt+FmbLXTHvw4eSINt2gRd4iJcurpZ3PxX7rR/+WXWLliBU/2ezHukBJlz+b1+GzafAC+nL6Als3rMmHafLZrnM/U2WquLq1lyz0ZPXok+7RuzWcTxrPNttvFHVLiPXTXzVz6t39Wvuo4kgu/d3kV6AlAHeDfhFsvHgssAa4BfiLccKIsuwD/AjYHugILgb+t8ZqXgAOATsAyYEj0PsXu/pu/QmbWHegOUK9Rk3LePrsueeiXhPLAX0+iW8/e3H/hicyfPYOJ/xnO5b1eiTG65MmvXZsnn30h7jASa/9tNmHct/MAeHfiTJ45dy82LahO22024eTH/htzdMly1DHH0rF9W36aPp3Bg95h+MixcYeUSH1eGrD68S33PhZjJPGr0KQbMStv6syl0QCt3u7eD+hjZq2BU9y9J6EfuKz9xwMdzawHoeo9Fhi3xsteB+4HmhKSPISm7k6Eu0qteczehIqaLbbfJTGXXl384POrf37135F0PPk8auQXlLOXyC/uHzJ59ePFy1ZxVp/RTCXSAAAgAElEQVSP2HfrTXjyg29ZtKzMf2qVTkFBAYPeHca7Q4dweY+rqFOnTtwhifxhFbkOeTHhMqcSeVEyxsyqu/uyde1rZjcA+wG1gEOB5cBNpV5SALwMPE5Ivm2BPxMGe91oZlu7+2RySM3adWjZ4Yi4w5AUWLB05eqR1vJb9erVo3M00lqkLEY6mqwxs0nAfHdvFW26DWhnZscBewHXrmPXPHe/eR3HHEa4n/ICwuVPpwKvu3tRNJBrFaGpuzPQs+K/joiIyG/lwsDIioxi/tbdO5VaXxw1Y19DaFr+DTOrAuwaJd612Z2QkHH3RWbWF8iLpuU0wjXIfSr2K4iIiOS+iiRkN7OdgHrAN9G2kop21lp3cC82s03dfXk5xz4zev3SUscVERHJqJyukM2sKqHJuDawA6F/dzNCM/Uw4N6yDlyBZCwiIpJ1YVKP5GfkskaCNyCMdF7p7i8T+pG7Eq4nrgtcvAHiExER+cOqWGaWrMa4rifc/adoasylZrY3sLGZHUmYqeta4Egza5jd8ERERCqHilwr7YS+477AyYQZtAD6EC5REhERSbScn8s60hz4gHCJ0s38cpOJQYS+ZRERkcQyoIpZRpZsqsjEIL+aJNbM7jSzs939STO7JHuhiYiIVB5lVshmtm/Ub7yau78FnGJmdYHKPTmqiIjkhCoZWrKpvAq5CmHCjvGEGz8YoU+5OXAW8H52wxMREfnjcuCqp3ITfsnNG+YSbqU4D3gXmABsy6/nuBYREZHfqbwK+SjgB0JiLlkA3N0vyGZgIiIimWAbYEBWJpQ1U1cVwl2aji7ZtMbztaI7QYmIiCRaDuTjMicGKQZeBB4t2VTqpwG9zKxBdsMTERH543J6pq41FACHEea1PhDYlTDC+vwsxSUiIlKplNeHnAdUK3UvZADM7D13H2lmnbMXmoiIyB9XMjFI0pWXkEexRt9x5HEAd7804xGJiIhkWA7k47ITsruvWsf257MTjoiISOVU7tSZIiIiOW0DDMjKBCVkERFJPVtr72uyZHtqThEREakAVcgiIpJqYZR13FGUTwlZRERSLxcSspqsRUREEkAVsoiIpJ7lwIXISsgiIpJq6kMWERFJAsuNmbrUhywiIpIASsgiIpJ6VcwyslSEmTUys0+ix33MbIyZXV9ujH/wdxQREUm0kj7kDXg/5LuBGmZ2PJDn7vsCLcxsm7J2UkIWERHJEDPrACwGfgbaAy9FTw0G2pS1rxKyiIiknllmFqCBmX1Uaun+y3tYNeAG4G/RplrAj9HjuUCjsmLM6VHW9WpU4/idm8YdRqKtKva4Q8gJg3q0izsESYEa1fLiDkHWyqiSuZtLzHb3Vut47m/AI+4+L7rueRFQI3oun3KK4JxOyCIiIgnSEehgZhcCuwNbAD8AY4HdgK/L2lkJWUREUs3YMNchu/sBq9/TbBhwNPCBmTUBDgNal7W/+pBFRCTdMjTCen1m+3L39u6+gDCwayxwoLvPL2sfVcgiIpJ6Fb2GONPcvZBfRlqXSRWyiIhIAqhCFhGRVNtQfch/lBKyiIikXlxN1utDTdYiIiIJoApZRERSLwcKZCVkERFJNyM3moNzIUYREZHUU4UsIiLpZmA50GathCwiIqmX/HSshCwiIiln6LInERERqSBVyCIiknrJr4+VkEVEpBLIgRZrNVmLiIgkgSpkERFJOdNlTyIiInHTTF0iIiJSYaqQRUQk9dRkLSIikgDJT8dqshYREUkEVcgiIpJuurmEiIhI/HJllLUSsoiIpF4uVMi58KVBREQk9VQhi4hI6iW/PlZCFhGRSiAHWqzVZJ0p3383ldO7HsPxh3Xgn9dfFXc4idX3icc47oiOHHdERw5q04orL/lL3CEljj5LFXN+t3No12Zfet52S9yhJNLsWTM46ZhOv9r2zcQvOLPrkTFFJOVRQs6Q2266jkuuvJZX33mPn6b/yOiRw+MOKZHOPPc8+r89lP5vD2WffdtwyhnnxB1S4uizVL7X+r/KqlWrGD5yDFOnTGHypElxh5Qo8+cVcvXF3Slasnj1Nnfn9pv+xsoVK2OMLB5hlLVlZMkmJeQMmTJ5ErvstgcAmzTYlIUL5sccUbL9NP1HZs2cwe4t94w7lMTRZ6l8I4YP44QuXQE4qNPBjB41MuaIkiUvL4/7HnuG/PyC1dteef4Z9tn/gBijipdZZpZsykpCNrNqZtbKzP5iZm+bWRszqx49d5eZHVTqte+tse9IM6uWjbiy6YhjjufeO25hyDtvMezdwbQ5oEPcISXaU48/yhnnnBd3GImkz1L5lixeTJMmTQGoX78+M2bOiDmiZMmvXUDtgjqr1wvnzuGNV17gnAsujTEqKU+2KuSaQBfgJOAadx8J3G1mxwGrgFVmVmBm2wJVzGxLM3vCzIYCOwIDzOxdM8uZv0SX9LiGAzsewvPPPkWXk06jVn5+3CElVnFxMaM+GM7+bdvFHUoi6bNUvlr5+RQVFQGwaNEivLg45oiS7e5b/84V1/2TqlWrxh1KTCxj/2VTtkZZLwSuA3oBC8wsD7gGeBv4NHpNQ6AjsAnQEjjf3Vea2VB372jhKu4cGBf3i5122Y0fp/3Aw088G3coiTZ29Ehatto7Jy7Uj4s+S2Vr2XJPRo8eyT6tW/PZhPFss+12cYeUaP8dM5LvpvwPgIlfTODenv/gsr/dGHNUG1Yu/LnJVkJuB9xMqHZ3Bp4DioEOwK0A7j4ZmGxmXQhJ+U4z2wnYwczeAgrd/bQ1D2xm3YHuAE033yJL4f8+jz54D93+cgk1ataMO5REG/buYFrv1ybuMBJNn6WyHXXMsXRs35afpk9n8KB3GD5ybNwhJdrg0eNXPz71uEMrXzKGrA/IygRz9+wc2KwOMA14iJCQ/0yoiucAA4FvgfMJzdqnAeOBZ939aDPbBzjR3S8v6z1222NPH/D+mKzEnxYbVUn+hzAJVhZn599BmmySn6yhHYWFhbw7dAht2h5A48aN4w4HgB/mLIk7hJywbeNa49y91QZ7v5129wdeGpKRYx22c8OsxZ7NiUEuAmYC/QnJuCfQFTga2AkYDQwFdnb3EQBmNtfMdgSuIDRxi4isVb169egcjbQWKdMGGCGdCdkaZd0S2Bt4n5CUNwaeIDRNFwO93f12dx+6xq5/BwYAX7v7/7IRm4iIVD6V9rInYDZwScmKu1/h7ie6+53A18BCM8szs45AVTNrbGZnAM8D/we0M7OLzaxhluITERFJlKw0Wbv79xCuRwbWHGdfFaju7quiy5quJQz82gw42t3nRNcsnw0cREjSIiIiv1u2L1nKhKzeXMLdT1/LtitKPb621FNDS21fBjyazdhERKRyMCAXxrdq6kwREZEE0O0XRUQk9Sp9k7WIiEgS5MJlT0rIIiKSerlQIasPWUREJAFUIYuISKrlyihrJWQREUm57N86MRPUZC0iIpIAqpBFRCTdcuTmEkrIIiKSejmQj9VkLSIikgSqkEVEJNXCKOvk18hKyCIiknrJT8dKyCIiUhnkQEZWH7KIiEgCqEIWEZHUy4WJQZSQRUQk9XJgTJearEVERJJAFbKIiKReDhTISsgiIlIJ5EBGVpO1iIhIAqhCFhGRVDM0ylpERCR+OXK3JzVZi4iIJIAqZBERSb0cKJCVkEVEpBLIgYyshCwiIilnOTGoS33IIiIiGWBmdczsHTMbbGb9zayamfUxszFmdn15+yshi4hI6pllZinHKcA97n4w8DPwZyDP3fcFWpjZNmXtnNNN1sUOy1cWxx1Gonle8ptpkqC42OMOQVJg10OvijsEWQsjo13IDczso1Lrvd29N4C7P1Jq+6bAqcB90fpgoA0waV0HzumELCIisoHNdvdWZb3AzPYF6gHfAj9Gm+cCLcvaT03WIiKSfpahpby3MasPPAicDSwCakRP5VNOzlVCFhGR1LMM/Vfme5hVA/4NXOPu3wHjCM3UALsRKuZ1UkIWERHJjHMIzdLXmdkwQk19mpndA3QF3i5rZ/Uhi4hI6m2Iuazd/VHg0V+/r70BdALudPf5Ze2vhCwiIqkX1/Um7l4IvFSR1yohi4hIumX4uqdsUR+yiIhIAqhCFhGR1MuFuayVkEVEJNWMDTOo649Sk7WIiEgCqEIWEZHUy4ECWQlZREQqgRzIyGqyFhERSQBVyCIiknoaZS0iIpIAuTDKWglZRERSLwfysfqQRUREkkAVsoiIpF8OlMhKyCIikmrh3hLJz8hqshYREUkAVcgiIpJuplHWIiIiiZAD+VhN1iIiIkmgCllERNIvB0pkJWQREUk5y4lR1krIIiKSerkwqEt9yCIiIgmgCllERFLNyIkuZCVkERGpBHIgI6vJOgNmzZxB1yMPAuDHad9z0jEHc8pxh3Lt5Rfi7jFHlwyzZs7g+MM7ALBixQrOOul4jju0PS/06xtvYAkza+YMOkefJYBJ33zFuad2iTGi5Dq/2zm0a7MvPW+7Je5QEqV5k0149YHzGdrnUnpeftxv1iW5lJD/oPnzCrnyom4ULVkCwPNP9+Hmux7g//oP5Kcfp/HVl5/HHGH85s0r5LK/nLv6HD3V+xF22W0P+g8cxoA3+rNo4cKYI0yG+fMKueKibixZHM7Td1OncNuN17JwwfyYI0ue1/q/yqpVqxg+cgxTp0xh8qRJcYeUGLdecgw9Hx9Ix3Puo2nDuox4tsev1tvuuU3cIcbCMvRfNmU1IZtZGzPLix7XMLO2Zby2rpk1yGY82VAlL48HnniW/Nq1Aehx3T/YetvtASgsnEP9TTaJM7xEyKuSxyNP9lt9jsaMGsGRx3YGYJ/92jDh03FxhpcYVfLyeOiJZ6kdnada+fn06vt8zFEl04jhwzihS1cADup0MKNHjYw5ouTYunlDPpn4AwCzChexpGjZr9br1N44zvBiY5aZJZuylpDNrBZwC7+03K8E7jGz2uvY5WTg+mzFky21axdQUFDnN9vf6v9vttluRxo1bhJDVMlSu+DX56hoyWIaNwnnpW7desyaOTOu0BJlzc9Sg00bUr169RgjSq4lixfTpElTAOrXr8+MmTNijig5+g/9hOvOO5zDD9iZTvvtQN/Xxvxq/f3/fB13iLIO2RzUNQ4YC3xlZlcBFxKScn8zqwJcDTwHlPxL2gioYmYlX3U3BTq5+/dZjDErvv92Ko8/ch/PvjIg7lASqWatfJYWFVFQUIfFixdTMz8/7pAkx9TKz6eoqAiARYsW4cXFMUeUHHc8MYj9dm/BpWd0pN+b//nN+uKi5XGHGIscGNOV1SbrT4FJwEjgXaCju+/r7h2BTsDHwDLgCOANoB2wD/AvoAMwJYuxZc38eYVcct7p3HH/Y2utnAV23W0P/jt2NAATv5hAs2bNY45Ick3LlnsyenT47v7ZhPFs0XzLeANKmPFfT6NZ43o80O+9ta5XOhlqrs52k3W2L3uaH73HKqCXmW0CNAC6uPusqHJ+D3g+ek11YHfgNuAhYHqW48u4Rx+4m+nTpvGPay4H4NKrbmCf/dfZdV4pdT7pVM7oeiwfjhnFN19/xR6t9o47JMkxRx1zLB3bt+Wn6dMZPOgdho8cG3dIiXLZGR15oN97FC1dsdZ1SSbLxmU5ZnYJcB3wJbA9sDXwvLsfZWZ9gR7ACuAZ4AbgM48CiZqztwIuBW5y9zlrHLs70B2gyebN9hz5yTcZjz9NquYls6Hm55+m89+xo2l3UKdEtCQUF+vytPI0rJOswUCFhYW8O3QIbdoeQOPGjeMOB4B6e10Udwg5YemnD49z91Yb6v123WNPH/DemIwcq1n96lmLPVsV8oPAfsAjwFnRtlWlX+Du883sBqA3sMzMGhEq5O+jn93XTMbRfr2jfdhl9z31VzRHNd6sCUcd1znuMCSH1atXj87RSGuRshi5MZd1thKyR8uZQNVo229G7rj7BKA1gJmdCWzu7rrKX0REMioH8nHWBnXtQKh0zwfygF34ZZCWA/ua2WVZem8REZGck60KuSswEDgdqA38A7ggeu4t4CJgtJmNJvQlO9AYqG5mHaPXVQE+dvdLsxSjiIhUEpW5yfo2YKW7vwc8bmbV3X0ZgLu/ArxiZtWA24EVvpaRZdEMX9WyFJ+IiFQi2Z72MhOykpDdffka68vKe81anl8FFGU4NBERkUTS7RdFRCT9kl8gKyGLiEj65UA+1u0XRUREkkAVsoiIpNqGmIc6E5SQRUQk9SrtKGsREZFESX4+Vh+yiIhIEqhCFhGR1MuBAlkJWURE0i8XBnWpyVpERCQBVCGLiEjKmUZZi4iIxM1Qk7WIiIhUkBKyiIhIAqjJWkREUi8XmqyVkEVEJPVyYVCXmqxFREQSQBWyiIikm+72JCIiEj8jN6bOVJO1iIhIAqhCFhGR9MuBElkJWUREUk+jrEVERKRCVCGLiEjqaZS1iIhIAuRAPlZCFhGRSiAHMrL6kEVERBJACVlERFLPMvRfue9j1sfMxpjZ9esboxKyiIikmhEGdWViKfN9zI4H8tx9X6CFmW2zXnG6++/+JeNmZrOA7+KOYw0NgNlxB5EDdJ7Kp3NUPp2jiknaeWru7ptuqDczs4GEc5AJGwNLS633dvfe0fs8AAx09wFm9meghrs/VdED5/Sgrg35P7SizOwjd28VdxxJp/NUPp2j8ukcVUxlP0/ufugGeqtawI/R47lAy/XZWU3WIiIimbEIqBE9zmc9c6wSsoiISGaMA9pEj3cDvl2fnXO6yTqhescdQI7QeSqfzlH5dI4qRudpw3gN+MDMmgCHAa3XZ+ecHtQlIiKSJGZWD+gEjHD3n9drXyVkERGR+KkPWUREJAGUkLPILBfuLyK5Qp8nkXRTQs4SM9sC2NbMNo47lqQqSTBmps9hGczsIjPLd/UvrVXJ58fMmppZQdzxiPxe+kOYBWbWAtgXaIfO8VqZmbm7m1k74FEz2yTumJLIzG4F9gMWR+u1440okfLNbG/gfqAm6Evemkp9+c2POxZZN31oMyxKMNsB9YEd+PUUa/KL+tG5uh4oBt4ws0xNbZcK0TR8Dd395OjLy1nAn80sT83XIemaWXNgIvAv4DygtpkVuHtxvNElS/T5ORwYEH15kQTSdcgZZGbHAccBPwPbEy4Mn25mvYC6QKG7L4oxxEQws72AHsBWwF/d/T9mdgXQ38yOdfc58UYYPzPrAMxw91uj9fOAY4Bu7r4q1uASwt2Lzaw+MBNYDnSJlk/M7Bp3XxFrgAlgZrUIhUE14HJgKNDJzD539yWxBie/oQo5Q6Jv6jUJ1V4nYCXQDzgWuBM4DdhalQ0AWxC+rLwDfGxmVd39X8BY4LnKXimb2R3AJcDd0fp5hObYh4ECM3vVzPY2s61jDDNWZnZQ9PAsoD0wBDgXuACYo2S8utl+R+AewmfpNWASsBdwvZnVjTE8WQtVyBkQ3WJrJ8LE4nWBhYT5TNsRplKrCewCvFHZB+aYWUegG3At0BboA6wws5qEBD0XeNPMDnf3wvgijYeZPQm0AJ4D9jCznYHjCcmmG/AKMJjQPDsYmBxTqLExsxrATWa2I+Hzsx3hszTG3b8xs51LNcvWAKa4+w8xhRuLaIxGsZltCuQRCoIi4HTgIuAMQquCJIgq5D/IzPYk3G5zK6AzofobAWxG+MB/Dgwg/NFoGlOYsTOz3aKHtwLPuvurhBaELYAXgBuB5939duAnQgKqVKLP0lhCki0gfMn7FOgOvAV8ApwCDHf3c9z9xbhijYuZbezuRcBVhKRyBbAN4QvKCjO7HNgfuA84mVAhLo4p3NhEfcaHEM7PWHf/n7tPd/eewJ8JX2BWQGhtMLP1uiuRZIdm6voDzKw3cA7hj+SbQG1C81BjQtPQQsIdP3Yg/BH9e0yhxirqQz+HcCuyzYF/Ame4+5dm9kb0uDB6bT1Cor7a3T+PK+YNrdQ56kr43FwAbA30IiTmusBjhPEJzwEnuXulqo7NrOTcjCN8uW1AOE9z3f3cUq97GXjM3YeYWZXKOMDLzJoCHxK+zLUAvnT3d6PukA7uvlf0upOBs4GL3P2r2AIWQBXy72Zm9wI/EJLMJHdfHM1b+nG0vACMB/4ErCpJxpWtDzk6T7OAAwn9WN8ANwB9zWx/4AtCkgYgSszHV7JkfC/h5vEHAOcTujjuIZyb0YQvd18DFxIS87JoqWxqEvo/twdmuvsQwudpdzM7DVYPGFwUPVeplIy9MLMDgZ0JrQTtCIMBPzSzq4CDgffMbC8z60JoZVAyTghVyL+Dmf0D2Njdrzaz/sAgoI+7r4iaHXci3BfzFMIfkaqEwV2LK1MfcpRoioErgaOBTQnNiN0IX1RuB/7h7q9X4kqm9Dk6BqgHnEr4Y+qEUcOtouc3dvdXzKyBu8+OKeQNLppcZ3tCS9OphMrvXWAC4XM1kdA0OwYYRUhC9xNabivFvzczqwPcRBjHsgNwpbuPjfqQHwS+JyTp0wnn8kbCZ+0Ud/86lqDlN5SQ15OZ3QV0JDRRb0xIvLcA3d29l5ntTmgi2pUw0vp+wnleEFPIsTCzewh/EK+ImvanuHvPqNnxfEJTY3PgH8CZlfGPQhnn6ARCNXwfsITwR/S8qO+0UjGzG4CPCP2dTmhNmUMYk9EIWOjuN0eV8RnAvYRR1vNiCjk2ZnY2odujM6ElZZ67/xQVCfOB76KioWQ0+imqjJNFTdbrwcyeIIwCPo4w6KiY0Dx9DfCTmR1JaGr8BHjA3W9x94WVMBn3ISSSK82sH9CEcI/QVwjXjN5J6A/9H6EZ+xkzq1aZmvPLOUdzCa0HlxGuH30HuN3Mqlayc9SCMGBrkrsPBaYSWpzaEFqhWgKfRedkLqFr5PvKlIzN7E/RzyaEMSznEr6sPAPcaGabuvs4d58cJeMTCQn7ZCXj5FGFXEFmti3wJXAC4cbT+xEGTFxFaJJ+htBE/Yi7Dy61n1WWZjMAM9uc0Pd5FqGS2ZPQinA2MIxQ4cwA3iMk5UMI39wrzYQp63GOBhMu67kIGFrJzlEe4YvvZYTm6emEJtZvgO8IVzUcTDh/w939HTOrUdlaEczsQsL4jF2BvxI+M28Qztt2hDkRbiZc978XoVvkXHf/MpaApUyqkCvI3b8h9E3NIYzyHE7oF36DUPX1JPQjD47+mJTsV2mSMYC7TwNaE5rOdnf3e4AjCedsLmEE+iOEP6rzgeLKlGhgvc7R9Gj960p4jlYRzkcHoBlwF+FSwmWEf4eHEbqKxgFtzKxWZUvGkQmES+QGE7rSWhC+xF1DuPxyDOE8HUy44uNkJePk0sQg68HdRwGY2YfAwGiZQxigNAg4wMxGVsYJLUpz94nR9aAPm9kCwrfyrQitCOcQLlm5kTCautL1HcN6naMulfgcTTWz7YFNCF+A2wCPAlsCj7v7d8B3ZjbI3SvdtcaRWcAqQotKHcKMZYcC1YGnCBOCFBIGer3p7t/GE6ZUhJqsfwcz24gwmnFf4C+EfuOJhMkJHnP3GTGGlxjRH9OHCLNLNQZeJtx04+/AherD0jkqTzSD2wrCF5SBhEGSr7v79MrWHbQu0bX7jxGq4+GEZHwLoTtohbs/Z2Z5mgM9+dRk/Tu4+0rC4JKLgQfdfRih2fouJeNfRMnkQsKlKV8TRsn+HV33uJrOUdncfUk0L3Ue8BmhFeEGM2umZBxELXJTCTMDfk+YMKUnsAfhSx5KxrlBFfLvZOEuKo3cfUplvYa2oqIBcYMITWuHR/3xUorOUdnMrCqwo7uPN7OLgH/ry+8vzKw6YZT1HYRLMpcSuj5ucvcv4oxNKk4JWTaI6EYAVSrTDFzrS+eobGZWzd11Q4QymNkehMFd44EJ0eyBkiM0qEs2CI3sLJ/OUdmUjCvke0Lz/kjX/Y5zjipkEZEUie6ItTTuOGT9KSGLiIgkgEZZi4iIJIASskgWRfNPVym1vpGZVTGz2mXs0yK6tlREKhElZJEMMrO2ZjbEzN40sx8Jl568bmZzzOw14DXCPOhDzay9mf3bzPqa2YvRCFkIc1rvER2vhpkNiB7fambDSi3D13jvd6JjtogS/1tmVteCPEQk0TTKWiSD3P0DM7uDMH3hk+7eH+gVTe94bMnrzOxwws0SVgHXEW5M0sDMBgOjgWIzyyfMCOdm1hC4FVjq7sVR1Z1f6njnE+Y0fg54H/iUMAf0V4QJNR4gXJ8qIgmlClkk85YA+7h7fzNrHc19/p2Z9TKzCWbWGtjb3SdHr+9FuH/tCsJsSyWOI0yluRthasQVJRPQuHtxyW09zWw7oD3hRgx3AS8RbkK/L/BP4Gx3VzIWSThVyCIZZGanEG7LaWY2jDD/8gDC7RXHEG6t+AXw6v+3d8euUURRFId/tzCCkDLGKJYGCxFMZdAiFpaaWJnCRlAwIvkX3FjEwsbCSkTFIEEhKAiKhRBCiGJhZSEWATFaiN2SQGDDtThvyBLbGZjifM3uwrLLVGfffbPvREQVyDvAf53ZmblQTmC6BHSAlYjYRMdH/kA/qB9n5jNgunz/T+A6Cvdp4GE56tXMWs6BbFavRTQ2foP6s3+jQ/8PAX8BMrMbEReBHhCoT7t6vtcU6rW9C5wp4+rXwBU0vu6VspOz6Gz1HnC5PN4DOmUP+lVmvm/mks2sDg5ksxpVI+WIAHXSjqFAPopOUapCdxIYRcE5CHTZDWfKZ4yjlfNXYBm4FRHVavc0cBXtPZ9HrWOPUDH9UnnPPhTSJ9HpTWbWYt5DNmtIadjZQuPlIWAb+FhuyJpFAfoc+I7G0MtoVVs5h0KdzLzP7p4ywCfgcERMZOa7zLyA9qGngM9obD4HjGfmncx82+ClmlkNHMhmNQstjwOgNO0MAh+AhfJ4Dd1J3UU3XXVQO88NVMF4HNjJzHk08o6IOAF8y8w/wACqabyN7tSurKMax1XgGOrnftrclZpZnTyyNqtRRAygsF0s//19gH743kQd2i9QAB9KqYoAAAB2SURBVL8EDgJzmbkREfPACDAMfEGrXNDYeX9pgJqJiCfAVmZuAit937uGVuP9TgEjpR50ooHLNbMa+SxrswZFxJHM/NX3+gCw7cJ4M9vLgWxmZtYC3kM2MzNrAQeymZlZCziQzczMWsCBbGZm1gIOZDMzsxb4B4gwfRbIX1cLAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 576x432 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "分类评估报告如下:\n",
      "\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "         军事       0.47      0.68      0.56       114\n",
      "         国内       0.56      0.41      0.47       102\n",
      "         国际       0.57      0.52      0.54       135\n",
      "         科技       0.00      0.00      0.00        39\n",
      "         航空       0.62      0.75      0.68       132\n",
      "\n",
      "avg / total       0.52      0.55      0.53       522\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# 绘制混淆矩阵函数\n",
    "def plot_confusion_matrix(cm, classes,\n",
    "                          normalize=False,\n",
    "                          title='Confusion matrix',\n",
    "                          cmap=plt.cm.Blues):\n",
    "    plt.figure(figsize=(8,6))\n",
    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
    "    plt.title(title)\n",
    "    plt.colorbar()\n",
    "    tick_marks = np.arange(len(classes))\n",
    "    plt.xticks(tick_marks, classes, rotation=45)\n",
    "    plt.yticks(tick_marks, classes)\n",
    "\n",
    "    fmt = '.2f' if normalize else 'd'\n",
    "    thresh = cm.max() / 2.\n",
    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
    "        plt.text(j, i, format(cm[i, j], fmt),\n",
    "                 horizontalalignment=\"center\",\n",
    "                 color=\"white\" if cm[i, j] > thresh else \"black\")\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.ylabel('真实标签')\n",
    "    plt.xlabel('预测标签')\n",
    "    plt.show()\n",
    "class_names=['军事','国内','国际','科技','航空']\n",
    "cm= confusion_matrix(y_test, y_pred)\n",
    "title=\"分类准确率:{:.2f}%\".format(accuracy_score(y_test,y_pred)*100)\n",
    "plot_confusion_matrix(cm,classes=class_names,title=title)\n",
    "print(\"分类评估报告如下:\\n\")\n",
    "print(classification_report(y_test,y_pred))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
